from lib2to3.pgen2 import driver
import  time,pymysql,pandas,re
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from datetime import datetime
# from crawlab import save_item
settings = { 'host':'172.16.10.201','user':'zy001','port':3306,'password':'zy@123','database':'crawl-bk','charset' : 'utf8'}
conn = pymysql.connect(**settings)
print('##################################################数据库连接成功##################################################')
cursor = conn.cursor()
print('##################################################获取游标成功##################################################')

option = ChromeOptions()
prefs = {
        'profile.default_content_setting_values': {
            'images': 2
        }
    }
# option.headless = True
option.add_experimental_option('prefs', prefs)
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument('--ignore-certificate-errors')
print('##################################################浏览器加载成功##################################################')


area_url = {
    '五华':'https://km.sydc.anjuke.com/sp-zu/wuhua/',
    '盘龙': 'https://km.sydc.anjuke.com/sp-zu/panlong/',
    '官渡':'https://km.sydc.anjuke.com/sp-zu/guandu/',
    '西山':'https://km.sydc.anjuke.com/sp-zu/xishan/',
    '呈贡':'https://km.sydc.anjuke.com/sp-zu/chenggong/',
    '安宁':'https://km.sydc.anjuke.com/sp-zu/anningshikm/',
    '大理':'https://da.sydc.anjuke.com/sp-zu/?kw='
}

# driver.get('https://km.sydc.anjuke.com/sp-zu/wuhua/')

def save_list(driver,k):
    sql = "insert into anjuke_rent_shop(list_url,url,region,create_list_time) values(%s,%s,%s,%s)"
    url_lists =  driver.find_elements_by_xpath('//div[@class="list-left"]/div[@class="list-item"]/a')
    ls = []
    for i in url_lists:
        url = i.get_attribute('href')
        list_url = driver.current_url
        row = (list_url,url,k,datetime.now())
        ls.append(row)
    cursor.executemany(sql,ls)
    conn.commit()
    return '成功获取'+str(len(url_lists))+'条数据'

def get_list():
    driver = webdriver.Chrome(executable_path='chromedriver', options=option)
    for k,v in area_url.items():
        # break
        driver.get(v)
        if 'callback' in driver.current_url:
            time.sleep(10)
        print(save_list(driver,k))
        next_url = driver.find_elements_by_xpath('//div[@class="page-content"]/a[@class="aNxt"]')
        while next_url:
            next_url[0].click()
            if 'callback' in driver.current_url:
                time.sleep(10)
            print(save_list(driver,k))
            next_url = driver.find_elements_by_xpath('//div[@class="page-content"]/a[@class="aNxt"]')
    # driver.close()
    # driver.get(url)


def save_detail():
    driver = webdriver.Chrome(executable_path='chromedriver', options=option)
    dic = {'month_rent':'月租','payment':'押付','min_rent_time':'起租期',
           'management_status':'经营状态','location_number':'工位数','building_names':'楼盘','address':'地址',
           'shop_qualitative':'商铺性质','area':'面积','shop_type':'商铺类型','layer_info':'楼层','specification':'规格',
           'crowd':'人群','manage':'物业','transfer_fee':'转让费'}
    sql = 'select url from anjuke_rent_shop where create_detail_time is  null order by id desc'
    cursor.execute(sql)
    datas = cursor.fetchall()
    for data in datas:
        # break
        try:
            url = data[0]
            print(url)
            driver.get(url)
            if 'callback' in driver.current_url:
                time.sleep(10)
            key = [i.text for i in driver.find_elements_by_xpath('//div[@id="basic-info"]/div/span[@class="title"]')]
            val = [i.text for i in driver.find_elements_by_xpath('//div[@id="basic-info"]/div/span[@class="value"]')]
            s = ''
            for k,v in dic.items():
                for i in key:
                    if v in i:
                        index = key.index(i)
                        s = s + k+'='+"'"+val[index]+"'"+","
            supporting_facility = driver.find_elements_by_xpath('//ul[@class="peitao-content"]')[0].text if driver.find_elements_by_xpath('//ul[@class="peitao-content"]')  else '暂无数据'
            agency = driver.find_elements_by_xpath('//div[@class="company"]/a')[0].text if driver.find_elements_by_xpath('//div[@class="company"]/a')  else '暂无数据'
            
            s += "supporting_facility='"+supporting_facility+"', agency='"+agency+"', create_detail_time = '"+ str(datetime.now())+"'"
            update_sql = "UPDATE anjuke_rent_shop  set " +s + "where url ='{url}'".format(url=url)
            cursor.execute(update_sql)
            conn.commit()
            # break
        except Exception as e:
            print(e)
            time.sleep(10)
    return '完成爬取'

if __name__ == '__main__':
    # get_list()
    save_detail()

    # cursor.close()
    # conn.close()